package com.ysq.excavator.service.impl;

import java.util.List;

import org.springframework.stereotype.Service;

import com.ysq.excavator.content.ContentManager;
import com.ysq.excavator.content.ExcavateContent;
import com.ysq.excavator.content.SplitWord;
import com.ysq.excavator.service.HtmlContentService;

@Service("htmlContentServiceImpl")
public class HtmlContentServiceImpl implements HtmlContentService {

	public void test(){
		
		System.out.println("1111");
	}
	
	/**
	 * 通过 url 获得正文
	 * @param url
	 * @return
	 */
	public String getUrlContent(String url){
		String contentText = null;
		try {
			ExcavateContent ec = new ExcavateContent(url);
			
			contentText = ec.getUrlContent();
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
		return contentText;
	}
	
	/**
	 * 将正文信息分词
	 * @param contentText
	 * @return
	 */
	public List<SplitWord> splitContentWord(String contentText){
		
		List<SplitWord> wordList = ContentManager.splitContentWord(contentText);
		
		return wordList;
	}
	
	/**
	 * 过滤正文分词
	 * @param contentText
	 * @return
	 */
	public List<SplitWord> filterSplitWord(String contentText){
		List<SplitWord> wordList = ContentManager.filterSplitWord(contentText);
		
		return wordList;
	}
}
